##################################################################################################  
### Training Activists in Nonviolent Action Increases Self-Efficacy and Reduces State Violence ###
### Survey Replication ########################################################################### 
### Consuelo Amat, Jonathan Pinckney, and Jawhratelkmal Kanu #####################################
### October 12, 2025 #############################################################################
##################################################################################################  

#Load packages: 
library(rstudioapi)
library(readxl)
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
library(tibble)
library(ggplot2)
library(gridExtra)
library(sandwich)
library(lmtest)

# Set Working Directory to Directory where script is saved
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) 

#Load data
sudan.survey.data <- readxl::read_excel("survey_with_ids.xlsx") 

## Subset data to only analyze pre and post survey, excluding follow up responses: 
prepost <- subset(sudan.survey.data, round != "follow_up")

#Remove missing data in ADM2_PCODE (municipality) variable: 
prepost <- prepost %>% 
  drop_na(ADM2_PCODE)


#######################################################################################
######### TESTING FOR BALANCE BETWEEN BASELINE CONTROL AND BASELINE TREATMENT #########
#######################################################################################

#Inputs: 
df          <- prepost
round_var   <- "round"          # column with survey wave
treat_var   <- "treatment"      # column with baseline assignment
cluster_var <- "ADM2_PCODE"     # district id for clustering

#Survey outcome variables:
outcomes <- c(
  "tact_violent_std",
  "perc_importanceofpeace_std",
  "perc_mosteffect_std",
  "perc_successofpeace_std",
  "att_retainviolence_std",
  "att_elite_std",
  "att_street_std",
  "att_negot_std",
  "conf_ind_scheme_std",
  "conf_ind_vision_std",
  "conf_ind_analysis_std",
  "conf_ind_dial_work_std",
  "conf_ind_dial_allies_std",
  "conf_ind_dial_disag_std",
  "conf_ind_solveprob_std",
  "conf_ind_goal_std",
  "conf_group_solveprob_std",
  "conf_group_goal_std"
)

#Formatting p-values
fmt_p <- function(p) ifelse(is.na(p), NA_character_,
                            ifelse(p < 0.001, "< 0.001", sprintf("%.3f", p)))

stopifnot(round_var %in% names(df), treat_var %in% names(df))

#Create a robust treat01 (0=control, 1=treat)
df <- df %>%
  mutate(
    .treat_str = if (is.numeric(.data[[treat_var]])) {
      ifelse(.data[[treat_var]] == 1, "treat", "control")
    } else {
      tolower(as.character(.data[[treat_var]]))
    },
    .treat01 = ifelse(str_detect(.treat_str, "treat|1|yes|true"), 1L, 0L)
  )

#Cross-tab round x treat to see counts
tab_rt <- df %>%
  mutate(.round_chr = as.character(.data[[round_var]])) %>%
  count(.round_chr, .treat01, name = "n") %>%
  tidyr::complete(.round_chr, .treat01 = c(0L,1L), fill = list(n = 0)) %>%
  group_by(.round_chr) %>%
  mutate(n_total = sum(n)) %>%
  ungroup()

#Pick baseline:
round_levels <- unique(as.character(df[[round_var]]))
baseline_level <- NULL
has_literal_baseline <- any(tolower(round_levels) == "baseline")
if (has_literal_baseline) {
  baseline_level <- round_levels[tolower(round_levels) == "baseline"][1]
} else {
  cand <- tab_rt %>%
    group_by(.round_chr) %>%
    summarise(n_ctrl = n[.treat01 == 0L],
              n_treat = n[.treat01 == 1L],
              min_pair = min(n_ctrl, n_treat),
              n_total = unique(n_total),
              .groups = "drop") %>%
    arrange(desc(min_pair), desc(n_total))
  baseline_level <- cand$.round_chr[1]
}

#Subset to baseline
base <- df %>% filter(as.character(.data[[round_var]]) == baseline_level)

#Clustered vcov: 
vcov_cluster <- function(mod) {
  if (cluster_var %in% names(model.frame(mod))) {
    sandwich::vcovCL(mod, cluster = reformulate(cluster_var), type = "HC1")
  } else {
    sandwich::vcovCL(mod, cluster = base[[cluster_var]], type = "HC1")
  }
}

#Run baseline balance tests (difference in means with clustered SEs)
balance <- map_dfr(use_outcomes, function(y) {
  if (!is.numeric(base[[y]]) || all(is.na(base[[y]]))) {
    return(tibble(outcome = y, n = sum(!is.na(base[[y]])),
                  n_ctrl = sum(base$.treat01 == 0 & !is.na(base[[y]])),
                  n_treat = sum(base$.treat01 == 1 & !is.na(base[[y]])),
                  estimate = NA_real_, std.error = NA_real_,
                  statistic = NA_real_, p_value = NA_real_))
  }
  fit <- lm(reformulate(".treat01", response = y), data = base)
  V   <- vcov_cluster(fit)
  ct  <- coeftest(fit, vcov. = V)
  
  rn <- rownames(ct); idx <- which(rn != "(Intercept)")[1]
  tibble(
    outcome   = y,
    n         = nobs(fit),
    n_ctrl    = sum(base$.treat01 == 0 & complete.cases(base[[y]])),
    n_treat   = sum(base$.treat01 == 1 & complete.cases(base[[y]])),
    estimate  = unname(ct[idx, "Estimate"]),    
    std.error = unname(ct[idx, "Std. Error"]),
    statistic = unname(ct[idx, "t value"]),
    p_value   = unname(ct[idx, "Pr(>|t|)"])
  )
})

#output
balance <- balance %>%
  mutate(p_fmt = fmt_p(p_value)) %>%
  arrange(outcome)

balance



##############################
######### OLS MODELS #########
##############################


#### Family 1 Survey Questions: On Nonviolent Action, Violence, and Peacebuilding

## att_retainviolence: support for your movement to retain option to use violence

#Making variable numeric
prepost$att_retainviolence <- as.numeric(as.character(prepost$att_retainviolence))

#Standardize:
mean_value <- mean(prepost$att_retainviolence, na.rm = TRUE) 
sd_value <- sd(prepost$att_retainviolence, na.rm = TRUE)  
prepost$att_retainviolence_std <- (prepost$att_retainviolence - mean_value) / sd_value 

#Reference group
prepost$round <- factor(prepost$round, levels = c("baseline","post_ws"))

#OLS model
LM1.1_retainviolence <- lm(att_retainviolence_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM1.1_retainviolence, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM1.1_retainviolence, vcov. = cov_clust)
ci <- lmtest::coefci(LM1.1_retainviolence, vcov. = cov_clust, level = 0.95)


#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM1.1_retainviolence)
df <- df.residual(LM1.1_retainviolence)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



## tact_violent: support for others in their movement using violence

#Making variable numeric
prepost$tact_violent <- as.numeric(as.character(prepost$tact_violent))

# Replace values of 6 (prefer not to answer) and 7 (don't know) with NA
prepost$tact_violent[prepost$tact_violent == 6 | prepost$tact_violent == 7] <- NA

#Standardize:
mean_value <- mean(prepost$tact_violent, na.rm = TRUE) 
sd_value <- sd(prepost$tact_violent, na.rm = TRUE)  
prepost$tact_violent_std <- (prepost$tact_violent - mean_value) / sd_value 

#OLS model
LM1.2_tact_violent <- lm(tact_violent_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM1.2_tact_violent, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM1.2_tact_violent, vcov. = cov_clust)
ci <- lmtest::coefci(LM1.2_tact_violent, vcov. = cov_clust, level = 0.95)


#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM1.2_tact_violent)
df <- df.residual(LM1.2_tact_violent)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)


## perc_importanceofpeace: does it matter that movement remain peaceful?

# Making variable to numeric
prepost$perc_importanceofpeace <- as.numeric(as.character(prepost$perc_importanceofpeace))
# Exclude answers that are don't know (value 5)
prepost$perc_importanceofpeace[prepost$perc_importanceofpeace == 5] <- NA

#Standardize:
mean_value <- mean(prepost$perc_importanceofpeace, na.rm = TRUE) 
sd_value <- sd(prepost$perc_importanceofpeace, na.rm = TRUE)  
prepost$perc_importanceofpeace_std <- (prepost$perc_importanceofpeace - mean_value) / sd_value 

#OLS model
LM1.3_importanceofpeace <- lm(perc_importanceofpeace_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM1.3_importanceofpeace, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM1.3_importanceofpeace, vcov. = cov_clust)
ci <- lmtest::coefci(LM1.3_importanceofpeace, vcov. = cov_clust, level = 0.95)


#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM1.3_importanceofpeace)
df <- df.residual(LM1.3_importanceofpeace)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



## perc_mosteffect: what is the most effective strategy against repressive regimes?

#Making variable numeric
prepost$perc_mosteffect <- as.numeric(as.character(prepost$perc_mosteffect))
#Exclude answers that are "other" (value 3) and "don't know" (value 4)
prepost$perc_mosteffect[prepost$perc_mosteffect == 3 | prepost$perc_mosteffect == 4] <- NA

#Standardize:
mean_value <- mean(prepost$perc_mosteffect, na.rm = TRUE) 
sd_value <- sd(prepost$perc_mosteffect, na.rm = TRUE)  
prepost$perc_mosteffect_std <- (prepost$perc_mosteffect - mean_value) / sd_value 

#OLS model
LM1.4_mosteffect <- lm(perc_mosteffect_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM1.4_mosteffect, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM1.4_mosteffect, vcov. = cov_clust)
ci <- lmtest::coefci(LM1.4_mosteffect, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM1.4_mosteffect)
df <- df.residual(LM1.4_mosteffect)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



## perc_successofpeace: in your opinion, how successful are peaceful movements 
# around the world against repressive regimes?

#Making variable numeric
prepost$perc_successofpeace <- as.numeric(as.character(prepost$perc_successofpeace))

# Exclude answers that are "don't know" (value 5)
prepost$perc_successofpeace[prepost$perc_successofpeace == 5] <- NA

#Standardize:
mean_value <- mean(prepost$perc_successofpeace, na.rm = TRUE) 
sd_value <- sd(prepost$perc_successofpeace, na.rm = TRUE)  
prepost$perc_successofpeace_std <- (prepost$perc_successofpeace - mean_value) / sd_value 

#OLS model
LM1.5_successofpeace <- lm(perc_successofpeace_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM1.5_successofpeace, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM1.5_successofpeace, vcov. = cov_clust)
ci <- lmtest::coefci(LM1.5_successofpeace, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM1.5_successofpeace)
df <- df.residual(LM1.5_successofpeace)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



#### General Questions: Belief about possibilities to make bottom up change

## att_elite: change is impossible if elites hold all the power

# Making variable numeric
prepost$att_elite <- as.numeric(as.character(prepost$att_elite))

#Standardize:
mean_value <- mean(prepost$att_elite, na.rm = TRUE) 
sd_value <- sd(prepost$att_elite, na.rm = TRUE)  
prepost$att_elite_std <- (prepost$att_elite - mean_value) / sd_value 

#OLS model
LM2.1_elite <- lm(att_elite_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM2.1_elite, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM2.1_elite, vcov. = cov_clust)
ci <- lmtest::coefci(LM2.1_elite, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM2.1_elite)
df <- df.residual(LM2.1_elite)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)




## att_street: street protests present an obstacle to negotiations

# Making variable numeric
prepost$att_street <- as.numeric(as.character(prepost$att_street))

#Standardize: 
mean_value <- mean(prepost$att_street, na.rm = TRUE) 
sd_value <- sd(prepost$att_street, na.rm = TRUE)  
prepost$att_street_std <- (prepost$att_street - mean_value) / sd_value 

#OLS model
LM2.2_street <- lm(att_street_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM2.2_street, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM2.2_street, vcov. = cov_clust)
ci <- lmtest::coefci(LM2.2_street, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM2.2_street)
df <- df.residual(LM2.2_street)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)


## att_negot: negotiations can address root causes of conflict

# Making variable numeric
prepost$att_negot <- as.numeric(as.character(prepost$att_negot))

#Standardize: 
mean_value <- mean(prepost$att_negot, na.rm = TRUE)
sd_value <- sd(prepost$att_negot, na.rm = TRUE)  
prepost$att_negot_std <- (prepost$att_negot - mean_value) / sd_value 

#OLS model
LM2.3_negot <- lm(att_negot_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM2.3_negot, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM2.3_negot, vcov. = cov_clust)
ci <- lmtest::coefci(LM2.3_negot, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM2.3_negot)
df <- df.residual(LM2.3_negot)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)




#### Family 2 Survey Questions: General Self- and Group Efficacy

## conf_ind_solveprob: self-efficacy when it comes to solving problems

# Making variable numeric
prepost$conf_ind_solveprob <- as.numeric(as.character(prepost$conf_ind_solveprob))

#Standardize:
mean_value <- mean(prepost$conf_ind_solveprob, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_solveprob, na.rm = TRUE)  
prepost$conf_ind_solveprob_std <- (prepost$conf_ind_solveprob - mean_value) / sd_value 

#OLS model
LM3.1_conf_ind <- lm(conf_ind_solveprob_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM3.1_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM3.1_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM3.1_conf_ind, vcov. = cov_clust, level = 0.95)


#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM3.1_conf_ind)
df <- df.residual(LM3.1_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



## conf_ind_goal: self-efficacy when it comes to achieving goals

# Making variable numeric
prepost$conf_ind_goal <- as.numeric(as.character(prepost$conf_ind_goal))

#Standardize:
mean_value <- mean(prepost$conf_ind_goal, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_goal, na.rm = TRUE)  
prepost$conf_ind_goal_std <- (prepost$conf_ind_goal - mean_value) / sd_value 

#OLS model
LM3.2_conf_ind <- lm(conf_ind_goal_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM3.2_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM3.2_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM3.2_conf_ind, vcov. = cov_clust, level = 0.95)


#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM3.2_conf_ind)
df <- df.residual(LM3.2_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



## conf_group_solveprob: group-efficacy when it comes to solving problems

# Making variable numeric
prepost$conf_group_solveprob <- as.numeric(as.character(prepost$conf_group_solveprob))

#Standardize:
mean_value <- mean(prepost$conf_group_solveprob, na.rm = TRUE) 
sd_value <- sd(prepost$conf_group_solveprob, na.rm = TRUE)  
prepost$conf_group_solveprob_std <- (prepost$conf_group_solveprob - mean_value) / sd_value 

#OLS model
LM3.3_conf_group <- lm(conf_group_solveprob_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM3.3_conf_group, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM3.3_conf_group, vcov. = cov_clust)
ci <- lmtest::coefci(LM3.3_conf_group, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM3.3_conf_group)
df <- df.residual(LM3.3_conf_group)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



## conf_group_goal: group-efficacy when it comes to achieving goals

# Making variable numeric
prepost$conf_group_goal <- as.numeric(as.character(prepost$conf_group_goal))

#Standardize:
mean_value <- mean(prepost$conf_group_goal, na.rm = TRUE) 
sd_value <- sd(prepost$conf_group_goal, na.rm = TRUE)  
prepost$conf_group_goal_std <- (prepost$conf_group_goal - mean_value) / sd_value 

#OLS model
LM3.4_conf_group <- lm(conf_group_goal_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM3.4_conf_group, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM3.4_conf_group, vcov. = cov_clust)
ci <- lmtest::coefci(LM3.4_conf_group, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM3.4_conf_group)
df <- df.residual(LM3.4_conf_group)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



#### Family 3 Survey Questions: Domain-Specific Efficacy/Confidence Questions
# (regarding SNAP tools and skills)


## conf_ind_scheme: Strategic planning using nonviolent action and peacebuilding

# Making variable numeric
prepost$conf_ind_scheme <- as.numeric(as.character(prepost$conf_ind_scheme))

#Standardize:
mean_value <- mean(prepost$conf_ind_scheme, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_scheme, na.rm = TRUE)  
prepost$conf_ind_scheme_std <- (prepost$conf_ind_scheme - mean_value) / sd_value 

#OLS model
LM4.1_conf_ind <- lm(conf_ind_scheme_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM4.1_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM4.1_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM4.1_conf_ind, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM4.1_conf_ind)
df <- df.residual(LM4.1_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)


## conf_ind_vision: developing a strategic vision for the movement

# Making variable numeric
prepost$conf_ind_vision <- as.numeric(as.character(prepost$conf_ind_vision))

#Standardize:
mean_value <- mean(prepost$conf_ind_vision, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_vision, na.rm = TRUE)  
prepost$conf_ind_vision_std <- (prepost$conf_ind_vision - mean_value) / sd_value 

#OLS model
LM4.2_conf_ind <- lm(conf_ind_vision_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM4.2_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM4.2_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM4.2_conf_ind, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM4.2_conf_ind)
df <- df.residual(LM4.2_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



## conf_ind_analysis: confidence in engaging in conflict analysis

# Making variable numeric
prepost$conf_ind_analysis <- as.numeric(as.character(prepost$conf_ind_analysis))

#Standardize:
mean_value <- mean(prepost$conf_ind_analysis, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_analysis, na.rm = TRUE)  
prepost$conf_ind_analysis_std <- (prepost$conf_ind_analysis - mean_value) / sd_value 

#OLS model
LM4.3_conf_ind <- lm(conf_ind_analysis_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM4.3_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM4.3_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM4.3_conf_ind, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM4.3_conf_ind)
df <- df.residual(LM4.3_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)


## conf_ind_dial_work: dialogue with own organization

# Making variable numeric
prepost$conf_ind_dial_work <- as.numeric(as.character(prepost$conf_ind_dial_work))

#Standardize:
mean_value <- mean(prepost$conf_ind_dial_work, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_dial_work, na.rm = TRUE)  
prepost$conf_ind_dial_work_std <- (prepost$conf_ind_dial_work - mean_value) / sd_value 

#OLS model
LM4.4_conf_ind <- lm(conf_ind_dial_work_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM4.4_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM4.4_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM4.4_conf_ind, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM4.4_conf_ind)
df <- df.residual(LM4.4_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)


# conf_ind_dial_allies (confidence in engaging in dialogue with allies)

# Making variable numeric
prepost$conf_ind_dial_allies <- as.numeric(as.character(prepost$conf_ind_dial_allies))

#Standardize:
mean_value <- mean(prepost$conf_ind_dial_allies, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_dial_allies, na.rm = TRUE)  
prepost$conf_ind_dial_allies_std <- (prepost$conf_ind_dial_allies - mean_value) / sd_value 

#OLS model
LM4.5_conf_ind <- lm(conf_ind_dial_allies_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM4.5_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM4.5_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM4.5_conf_ind, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM4.5_conf_ind)
df <- df.residual(LM4.5_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



# conf_ind_dial_disag: dialogue with opponents or with those whom you disagree

# Making variable numeric
prepost$conf_ind_dial_disag <- as.numeric(as.character(prepost$conf_ind_dial_disag))

#Standardize:
mean_value <- mean(prepost$conf_ind_dial_disag, na.rm = TRUE) 
sd_value <- sd(prepost$conf_ind_dial_disag, na.rm = TRUE)  
prepost$conf_ind_dial_disag_std <- (prepost$conf_ind_dial_disag - mean_value) / sd_value 

#OLS model
LM4.6_conf_ind <- lm(conf_ind_dial_disag_std ~ round, data = prepost)

#District-clustered VCOV 
cov_clust <- sandwich::vcovCL(LM4.6_conf_ind, cluster = ~ ADM2_PCODE, type = "HC1")

ct <- lmtest::coeftest(LM4.6_conf_ind, vcov. = cov_clust)
ci <- lmtest::coefci(LM4.6_conf_ind, vcov. = cov_clust, level = 0.95)

#Pull statistics
term <- "roundpost_ws"  

# Basic counts
N  <- nobs(LM4.6_conf_ind)
df <- df.residual(LM4.6_conf_ind)
G  <- length(unique(prepost$ADM2_PCODE))

est <- ct[term, "Estimate"]
se  <- ct[term, "Std. Error"]
tvl <- ct[term, "t value"]
pvl <- ct[term, "Pr(>|t|)"]
lo  <- ci[term, 1]
hi  <- ci[term, 2]

#Return a list
list(
  N = N,
  clusters = G,
  df_residual = df,
  term = term,
  estimate = est,
  se_clustered = se,
  t = tvl,
  p = pvl,
  ci95 = c(lower = lo, upper = hi)
)



###########################
#### CREATING FIGURE 2 ####
###########################

#Extract clustered estimate/CI from one model
extract_estimates <- function(m, cluster_var = "ADM2_PCODE",
                              term = "roundpost_ws", level = 0.95) {
  mf <- stats::model.frame(m)                   
  stopifnot(term %in% names(coef(m)))
  Vc <- sandwich::vcovCL(m, cluster = mf[[cluster_var]], type = "HC1")
  est <- coef(m)[term]
  se  <- sqrt(diag(Vc))[term]
  df  <- df.residual(m)
  crit <- qt(1 - (1 - level)/2, df)         
  lo <- est - crit*se
  hi <- est + crit*se
  p  <- 2*pt(-abs(est/se), df)        
  
  data.frame(term = term, estimate = est, se = se, lo = lo, hi = hi, df = df, p = p)
}

#Plot for a list of models 
create_plot <- function(model_list, title, x_labels,
                        cluster_var = "ADM2_PCODE", color = usip.blue) {
  ests <- map2_dfr(model_list, x_labels, ~{
    out <- extract_estimates(.x, cluster_var = cluster_var)
    out$Model <- .y
    out
  })
  
  
  ggplot(ests, aes(x = Model, y = estimate)) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_point(color = color) +
    geom_errorbar(aes(ymin = lo, ymax = hi), width = 0.2, color = color) +
    coord_cartesian(ylim = c(-0.5, 0.5)) +     # <── fixed y-axis range
    labs(title = title, y = "Coefficient Estimate (Std. Dev. Units)", x = NULL) +
    theme(axis.text.x = element_text(angle = 0, hjust = 0.5, size = 7))
}

# Model lists:
model_lists1 <- list(LM1.1_retainviolence, LM1.2_tact_violent, LM1.3_importanceofpeace,
                     LM1.4_mosteffect, LM1.5_successofpeace)
model_lists2 <- list(LM2.1_elite, LM2.2_street, LM2.3_negot)
model_lists3 <- list(LM3.1_conf_ind, LM3.2_conf_ind, LM3.3_conf_group, LM3.4_conf_group)
model_lists4 <- list(LM4.1_conf_ind, LM4.2_conf_ind, LM4.3_conf_ind, LM4.4_conf_ind,
                     LM4.5_conf_ind, LM4.6_conf_ind)

#Labels:
x_labels1 <- c("Retain Viol. Option", "Mov. Use Viol.", "Nonviol. Discipline",
               "Nonviol. Effective", "Nonviol. Success Global")
x_labels2 <- c("Change Impossible if Elites Power", "Protests Obstacle Negot.",
               "Negot. Cannot Address Root Causes")
x_labels3 <- c("Indiv.- Solve Prob.", "Indiv.- Goals", "Group- Solve Prob.", "Group- Goals")
x_labels4 <- c("Conflict Analysis", "Vision and Mission", "Strategic Plan Synergy",
               "Dialogue Within", "Dialogue w/Allies", "Dialogue w/Disag.")

#Build Plots:
plot1 <- create_plot(model_lists1, "Effect of SNAP on Attitudes toward Violence & NVA", x_labels1)
plot2 <- create_plot(model_lists2, "Beliefs about Change via NVA & Peacebuilding", x_labels2)
plot3 <- create_plot(model_lists3, "General Individual & Group Efficacy", x_labels3)
plot4 <- create_plot(model_lists4, "Self-Efficacy Using NVA & PB Skills", x_labels4)

#Arrange in 2x2
gridExtra::grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)




##################################################
######### MULTIPLE COMPARISON ADJUSTMENT #########
##################################################

#Clustered vcov for lm() models
cluster_vcov_lm <- function(mod, cluster_var) {
  # Build formula like ~ ADM2_PCODE
  cl_form <- reformulate(cluster_var)
  sandwich::vcovCL(mod, cluster = cl_form, type = "HC1")
}

#Extract target coefficient with clustered SE, t, p (t df) 
tidy_one_coef_clustered_exact <- function(mod, term_name, cluster_var) {
  V   <- cluster_vcov_lm(mod, cluster_var)
  bet <- coef(mod)
  if (!(term_name %in% names(bet))) {
    stop("Term not found in model: ", term_name,
         "\nAvailable terms: ", paste(names(bet), collapse = ", "))
  }
  se   <- sqrt(diag(V))[term_name]
  est  <- bet[term_name]
  tval <- est / se
  df   <- df.residual(mod)                 
  pval <- 2 * pt(abs(tval), df = df, lower.tail = FALSE)
  
  tibble(
    term      = term_name,
    estimate  = as.numeric(est),
    std.error = as.numeric(se),
    statistic = as.numeric(tval),         
    p.value   = as.numeric(pval)
  )
}

#lm() models and outcome variables for each:
models_family <- list(
  tact_violent_std           = LM1.2_tact_violent,
  perc_importanceofpeace_std = LM1.3_importanceofpeace,
  perc_mosteffect_std        = LM1.4_mosteffect,
  perc_successofpeace_std    = LM1.5_successofpeace,
  att_retainviolence_std     = LM1.1_retainviolence,
  att_elite_std              = LM2.1_elite,
  att_street_std             = LM2.2_street,
  att_negot_std              = LM2.3_negot,
  conf_ind_scheme_std        = LM4.1_conf_ind,
  conf_ind_vision_std        = LM4.2_conf_ind, 
  conf_ind_analysis_std      = LM4.3_conf_ind, 
  conf_ind_dial_work_std     = LM4.4_conf_ind, 
  conf_ind_dial_allies_std   = LM4.5_conf_ind, 
  conf_ind_dial_disag_std    = LM4.6_conf_ind,
  conf_ind_solveprob_std     = LM3.1_conf_ind,
  conf_ind_goal_std          = LM3.2_conf_ind,
  conf_group_solveprob_std   = LM3.3_conf_group, 
  conf_group_goal_std        = LM3.4_conf_group
)

#Parameters
cluster_var_name <- "ADM2_PCODE"
target_term      <- "roundpost_ws"    

#Run extraction (clustered, t-based p to match manual) 
res_raw <- bind_rows(lapply(names(models_family), function(nm) {
  tidy_one_coef_clustered_exact(models_family[[nm]],
                                term_name  = target_term,
                                cluster_var = cluster_var_name) |>
    mutate(outcome = nm)
}))

res_final <- res_raw |>
  mutate(p_raw_clustered = p.value) |>
  select(outcome, term, estimate, std.error, statistic, p_raw_clustered)

#Define families (combine individual & group efficacy) 
family_map <- c(
  # Attitudes (violence/nonviolence)
  tact_violent_std           = "Attitudes (violence/nonviolence)",
  perc_importanceofpeace_std = "Attitudes (violence/nonviolence)",
  perc_mosteffect_std        = "Attitudes (violence/nonviolence)",
  perc_successofpeace_std    = "Attitudes (violence/nonviolence)",
  att_retainviolence_std     = "Attitudes (violence/nonviolence)",
  
  # Beliefs about change
  att_elite_std   = "Beliefs about change",
  att_street_std  = "Beliefs about change",
  att_negot_std   = "Beliefs about change",
  
  # Skills/self-efficacy (tools)
  conf_ind_scheme_std      = "Skills/self-efficacy (tools)",
  conf_ind_vision_std      = "Skills/self-efficacy (tools)",
  conf_ind_analysis_std    = "Skills/self-efficacy (tools)",
  conf_ind_dial_work_std   = "Skills/self-efficacy (tools)",
  conf_ind_dial_allies_std = "Skills/self-efficacy (tools)",
  conf_ind_dial_disag_std  = "Skills/self-efficacy (tools)",
  
  # Efficacy (individual & group combined)
  conf_ind_solveprob_std   = "Efficacy (individual & group)",
  conf_ind_goal_std        = "Efficacy (individual & group)",
  conf_group_solveprob_std = "Efficacy (individual & group)",
  conf_group_goal_std      = "Efficacy (individual & group)"
  
)

#BH within family (on clustered p-values)
res_family <- res_final |>
  mutate(family = unname(family_map[outcome])) |>
  group_by(family) |>
  mutate(q_BH = p.adjust(p_raw_clustered, method = "BH")) |>
  ungroup()

#Formatting p-values: 3 decimals; if < .001 then "< 0.001" 
fmt_p <- function(p) {
  ifelse(is.na(p), NA_character_,
         ifelse(p < 0.001, "< 0.001", sprintf("%.3f", p)))
}

#Add formatted strings and present 
res_family_fmt <- res_family |>
  mutate(
    p_raw_fmt = fmt_p(p_raw_clustered),
    q_BH_fmt  = fmt_p(q_BH)
  ) |>
  select(
    family, outcome, term, estimate, std.error, statistic,
    p_raw_clustered, p_raw_fmt,
    q_BH, q_BH_fmt
  ) |>
  arrange(family, outcome)

print(res_family_fmt)



